#file <- paste0(ref_path, "/PFIRS/PFIRS 2019-2020 CT pulled 2022.xlsx")
file <- paste0(ref_path, "/PFIRS/PFIRS 2017-2022 pulled 2023.xlsx")
pulled_year <- substr(file, nchar(file)-8, nchar(file)-5)
filter_year <- 2019
min_year <- 2017
max_year <- 2022
print(paste("data was pulled in", pulled_year))
## [1] "data was pulled in 2023"
xy <- read_excel(file)
## Warning: Expecting date in A8155 / R8155C1: got 'NA'
xy %>% head()
## # A tibble: 6 × 9
## `Burn Date` `Burn Unit` Agency `Acres Burned` Latitude Longitude
## <dttm> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2017-01-02 00:00:00 PP Ridgetop Unit Cal Fi… 10 38.8 -123.
## 2 2017-01-03 00:00:00 Fairway/Bunker Califo… 2.5 39.2 -120.
## 3 2017-01-03 00:00:00 PP Ridgetop Unit Cal Fi… 10 38.8 -123.
## 4 2017-01-03 00:00:00 FIA Forest hill Privat… 10 39.0 -120.
## 5 2017-01-03 00:00:00 28A US For… 51 37.7 -119.
## 6 2017-01-04 00:00:00 Plum 52 US For… 1 39.5 -121.
## # ℹ 3 more variables: `Fuel Type` <chr>, `Total Tons` <chr>, `Burn Type` <chr>
xy <- xy %>%
rename(Burn_Date = `Burn Date`) %>%
rename(Burn_Unit = `Burn Unit`) %>%
rename(Acres_Burned = `Acres Burned`) %>%
rename(Fuel_Type = `Fuel Type`) %>%
rename(Total_Tons = `Total Tons`) %>%
rename(Burn_Type = `Burn Type`)
xy <- xy %>%
filter(!Longitude == "UNK") %>%
mutate(Longitude = as.numeric(Longitude)) %>%
mutate(Latitude = as.numeric(Latitude))
xy <- xy %>%
mutate(Longitude = ifelse(Longitude > 0, Longitude*(-1), Longitude))
xy <- xy %>%
mutate(Year = year(Burn_Date))
summary(as.factor(xy$Year))
## 2017 2018 2019 2020 2021 2022 2023 NA's
## 855 1276 1324 2195 2504 2492 3 1
nrow_old <- nrow(xy)
xy <- xy %>%
filter(Year <= max_year & Year >= min_year)
deleted_n <- nrow_old - nrow(xy)
print(paste("Deleted", deleted_n, "duplicated rows"))
## [1] "Deleted 4 duplicated rows"
From Jason Branz: “Typically if there are multiple records with the same date, burn name, and acres, it’s a duplicate.”
xy %>%
select(Year, Burn_Date, Burn_Unit, Acres_Burned) %>%
filter(duplicated(.))
## # A tibble: 398 × 4
## Year Burn_Date Burn_Unit Acres_Burned
## <dbl> <dttm> <chr> <dbl>
## 1 2017 2017-01-31 00:00:00 Pendola CreepyOregonHillPile Unit 20
## 2 2017 2017-02-01 00:00:00 Slapjack dozer piles unit 11 14
## 3 2017 2017-04-24 00:00:00 Delvan Pile 2
## 4 2017 2017-11-15 00:00:00 Challenge Landing Pile 1
## 5 2017 2017-11-15 00:00:00 2017-Airport East Side 1.5
## 6 2017 2017-11-15 00:00:00 Oregon Hill 2
## 7 2017 2017-11-27 00:00:00 49 Road Improvement Piles 10
## 8 2017 2017-11-30 00:00:00 Reserves Piles 0.25
## 9 2018 2018-01-17 00:00:00 Griff 1002 6
## 10 2018 2018-01-17 00:00:00 Twin Peaks 59 20
## # ℹ 388 more rows
nrow_old <- nrow(xy)
xy <- xy %>%
distinct(Burn_Date, Burn_Unit, Acres_Burned, .keep_all = T)
deleted_n <- nrow(xy)-nrow_old
print(paste("Deleted", deleted_n, "duplicated rows"))
## [1] "Deleted -398 duplicated rows"
sf <- st_as_sf(xy, coords = c("Longitude", "Latitude"), crs = 4326)
layer_name = paste0("PFIRS_", min_year, "-", max_year, "_pulled", pulled_year)
st_write(sf, "shapefiles", layer_name, driver = "ESRI Shapefile", delete_layer = T)
## Deleting layer `PFIRS_2017-2022_pulled2023' using driver `ESRI Shapefile'
## Writing layer `PFIRS_2017-2022_pulled2023' to data source
## `shapefiles' using driver `ESRI Shapefile'
## Writing 10248 features with 8 fields and geometry type Point.
save(sf, file = paste0("Rdata/", layer_name, ".Rdata"))
CA <- st_read(dsn = paste0(ref_path, "/CA boundary/ca-state-boundary/", layer = "CA_State_TIGER2016.shp"))
## Reading layer `CA_State_TIGER2016' from data source
## `C:\Users\ctubbesi\OneDrive - California Air Resources Board\Documents\Reference data\CA boundary\ca-state-boundary\CA_State_TIGER2016.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -13857270 ymin: 3832931 xmax: -12705030 ymax: 5162404
## Projected CRS: WGS 84 / Pseudo-Mercator
CA <- st_transform(CA, st_crs(sf))
sf <- st_intersection(sf, CA)
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
load(file = "Rdata/NF.Rdata")
NF <- st_transform(NF, st_crs(sf))
This prevents an error in geometry handling:
sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
sf_noNF <- st_difference(sf, NF)
## although coordinates are longitude/latitude, st_difference assumes that they
## are planar
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
mapview::mapview(list(NF, sf_noNF), col.regions=list("red","blue"),col=list("red","blue"))